xssssss— title: “Hosp Data Inspection” output: html_document date: “2024-09-02” —

Visualizing Outcome by State

p <- lapply(unique(national_hosps$state), function(state_nm){
  ggplot(data = national_hosps %>% filter(state == state_nm), 
         aes(x = date, y = hospitalizations, color = state, group = state)) + 
    geom_point() + 
    labs(title = paste0('State: ', state_nm))
})
for(set in seq(1, length(p), 4)){
  do.call(grid.arrange,p[set:min(set+3, length(p))])
}

Visualizing Inputs

Wastewater Data

full_scrape$NWSS_Wastewater_Metric %>% head()
## # A tibble: 6 × 16
##   wwtp_jurisdiction wwtp_id reporting_jurisdiction sample_location       
##   <chr>               <dbl> <chr>                  <chr>                 
## 1 Michigan              889 Michigan               Before treatment plant
## 2 Michigan              889 Michigan               Before treatment plant
## 3 Michigan              889 Michigan               Before treatment plant
## 4 Michigan              889 Michigan               Before treatment plant
## 5 Michigan              889 Michigan               Before treatment plant
## 6 Michigan              889 Michigan               Before treatment plant
## # ℹ 12 more variables: sample_location_specify <dbl>, key_plot_id <chr>,
## #   county_names <chr>, county_fips <chr>, population_served <dbl>,
## #   date_start <date>, date_end <date>, ptc_15d <dbl>, detect_prop_15d <dbl>,
## #   percentile <dbl>, sampling_prior <chr>, first_sample_date <date>
nwss <- full_scrape$NWSS_Wastewater_Metric %>%
  select(date = date_end, state = wwtp_jurisdiction, sample_location, detect_prop_15d) %>% 
  group_by(date, state, sample_location) %>%
  summarise(avg_detect = mean(detect_prop_15d, na.rm = T), .groups = "keep")

nwss_plots <- lapply(unique(nwss$state), function(state_nm){
  ggplot(data = nwss %>% filter(state == state_nm), 
         aes(x = date, y = avg_detect, group = sample_location, color = sample_location)) + 
    geom_point() + 
    labs(title = paste0('State: ', state_nm))
})
for(set in seq(1, length(nwss_plots), 4)){
  do.call(grid.arrange,nwss_plots[set:min(set+3, length(nwss_plots))])
}
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 411 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 559 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 144 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 540 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 21 rows containing missing values or values outside the scale range
## (`geom_point()`).

NSSP ED Visits

full_scrape$NSSP_ED_Visit_Trajectory %>% head()
## # A tibble: 6 × 20
##   week_end            geography     county percent_visits_combined
##   <dttm>              <chr>         <chr>                    <dbl>
## 1 2022-10-01 00:00:00 United States All                       2.84
## 2 2022-10-08 00:00:00 United States All                       2.92
## 3 2022-10-15 00:00:00 United States All                       3.24
## 4 2022-10-22 00:00:00 United States All                       3.71
## 5 2022-10-29 00:00:00 United States All                       5.1 
## 6 2022-11-05 00:00:00 United States All                       6.46
## # ℹ 16 more variables: percent_visits_covid <dbl>,
## #   percent_visits_influenza <dbl>, percent_visits_rsv <dbl>,
## #   percent_visits_smoothed <dbl>, percent_visits_smoothed_covid <dbl>,
## #   percent_visits_smoothed_1 <dbl>, percent_visits_smoothed_rsv <dbl>,
## #   ed_trends_covid <chr>, ed_trends_influenza <chr>, ed_trends_rsv <chr>,
## #   hsa <chr>, hsa_counties <chr>, hsa_nci_id <chr>, fips <dbl>,
## #   trend_source <chr>, buildnumber <lgl>
nssp <- full_scrape$NSSP_ED_Visit_Trajectory %>% filter(county == "All") %>% 
  select(date = week_end, state = geography, percent_visits_covid)

nssp_plots <- lapply(unique(nssp$state), function(state_nm){
  ggplot(data = nssp %>% filter(state == state_nm), 
         aes(x = date, y = percent_visits_covid, group = state)) + 
    geom_point() + 
    labs(title = paste0('State: ', state_nm))
})
for(set in seq(1, length(nssp_plots), 4)){
  do.call(grid.arrange,nssp_plots[set:min(set+3, length(nssp_plots))])
}

NRVESS Test Data

full_scrape$NRVESS_Test_Positivity %>% head()
## # A tibble: 6 × 10
##   level     perc_diff percent_pos percent_pos_2_week percent_pos_4_week
##   <chr>         <dbl>       <dbl>              <dbl>              <dbl>
## 1 National       -2.7         9.4               13.1               13.1
## 2 Region 1       -9.7         8.5               18.1               17.7
## 3 Region 10     -10.4         6.8               20.9               20.9
## 4 Region 2        0.9        14.8               13.8               13.9
## 5 Region 3       -0.9        13.4               14                 13.8
## 6 Region 4        1.1        10.1                8.7                8.9
## # ℹ 5 more variables: number_tested <dbl>, number_tested_2_week <dbl>,
## #   number_tested_4_week <dbl>, posted <dttm>, mmwrweek_end <dttm>
nvss_tp <- full_scrape$NRVESS_Test_Positivity %>% 
  select(date = mmwrweek_end, level, percent_pos, number_tested)

nvss_tp_plots <- lapply(unique(nvss_tp$level), function(level_nm){
  ggplot(data = nvss_tp %>% filter(level == level_nm), 
         aes(x = date, y = percent_pos, group = level)) + 
    geom_point() + 
    labs(title = paste0('Level: ', level_nm))
})
for(set in seq(1, length(nvss_tp_plots), 3)){
  do.call(grid.arrange,nvss_tp_plots[set:min(set+2, length(nvss_tp_plots))])
}

MakeMyTestCount Self Test Data

full_scrape$MakeMyTestCount %>% head()
## # A tibble: 6 × 11
##   date                state state_name state_fips fema_region age_group   race  
##   <dttm>              <chr> <chr>      <chr>      <chr>       <chr>       <chr> 
## 1 2022-11-27 00:00:00 OK    Oklahoma   40         Region 6    18-29 Years Black 
## 2 2022-11-27 00:00:00 NY    New York   36         Region 2    65-74 Years White 
## 3 2022-11-27 00:00:00 CO    Colorado   08         Region 8    30-39 Years More …
## 4 2022-11-27 00:00:00 OR    Oregon     41         Region 10   40-49 Years White 
## 5 2022-12-04 00:00:00 AZ    Arizona    04         Region 9    40-49 Years White 
## 6 2022-12-18 00:00:00 OK    Oklahoma   40         Region 6    50-64 Years White 
## # ℹ 4 more variables: ethnicity <chr>, sex <chr>, test_result <chr>,
## #   total_tests <dbl>
mmtc <- full_scrape$MakeMyTestCount %>% 
  select(date, state, test_result, total_tests) %>%
  group_by(date, state, test_result) %>%
  summarise(total_tests = sum(total_tests, na.rm = T)) %>%
  ungroup() %>%
  pivot_wider(names_from = test_result, values_from = total_tests, 
              values_fill = 0) %>%
  rowwise() %>%
  mutate(pct_positive = Positive/(Positive + Negative))
## `summarise()` has grouped output by 'date', 'state'. You can override using the
## `.groups` argument.
mmtc_plots <- lapply(unique(mmtc$state), function(state_nm){
  ggplot(data = mmtc %>% filter(state == state_nm), 
         aes(x = date, y = pct_positive, group = state)) + 
    geom_point() + 
    labs(title = paste0('State: ', state_nm))
})
for(set in seq(1, length(mmtc_plots), 4)){
  do.call(grid.arrange,mmtc_plots[set:min(set+3, length(mmtc_plots))])
}

Variant Proprtions

  full_scrape$NRVESS_Var_Props %>% 
  filter(modeltype == "weighted", count_lt10 %in% c('0', '0.0'), 
         week_ending >= '2022-07-01', time_interval == 'biweekly') %>% 
  head()
## # A tibble: 6 × 10
##   usa_or_hhsregion week_ending         variant      share share_hi share_lo     
##   <chr>            <dttm>              <chr>        <dbl>    <dbl> <chr>        
## 1 USA              2022-07-09 00:00:00 BA.2      0.0355   0.0385   0.0325906798…
## 2 USA              2022-07-09 00:00:00 BA.2.12.1 0.196    0.203    0.1893576979…
## 3 USA              2022-07-09 00:00:00 BA.2.75   0.000275 0.000485 0.0001407369…
## 4 USA              2022-07-09 00:00:00 BA.4      0.131    0.138    0.1253782510…
## 5 USA              2022-07-09 00:00:00 BA.4.6    0.0200   0.0249   0.0158076975…
## 6 USA              2022-07-09 00:00:00 BA.5      0.615    0.624    0.6053490042…
## # ℹ 4 more variables: count_lt10 <chr>, modeltype <chr>, time_interval <chr>,
## #   creation_date <dttm>
var_props <- full_scrape$NRVESS_Var_Props %>% 
  filter(modeltype == "weighted", count_lt10 %in% c('0', '0.0'), 
         week_ending >= '2022-07-01', time_interval == 'biweekly', 
         creation_date == max(creation_date)) %>%
  select(date = week_ending, usa_or_hhsregion, variant, share) %>%
  mutate(variant = gsub( "(^[^.]+[.][^.]+)(.+$)", "\\1", variant)) %>%
  group_by(date, usa_or_hhsregion, variant) %>%
  summarise(share = sum(share, na.rm = T))
## `summarise()` has grouped output by 'date', 'usa_or_hhsregion'. You can
## override using the `.groups` argument.
ggplot(data = var_props, 
         aes(x = date, y = share, fill = variant)) + 
    geom_bar(position="stack", stat="identity") + 
    facet_wrap(usa_or_hhsregion ~ .)